source("src/rules/scripts/functions.R")

################
# Theme colors #
################

# Colors for heatmap (from the ArchR package)
blueYellow <- c("#352A86", "#343DAE", "#0262E0", "#1389D2", "#2DB7A3",
    "#A5BE6A", "#F8BA43", "#F6DA23", "#F8FA0D")

# Colors for statistics
fastqc_colors <- c("#e31a1c", "#238443", "#ec7014")

star_colors <- c("#737373", "#8c6bb1", "#ec7014",
                 "#e31a1c", "#238443", "#225ea8")

Background

This documents the analysis of the effect of cytokine treatment on PTPN2 KO mice. This document aims to analyze differences between PTPN2 KO and WT mice in their response to cytokine treatment.

# Load in the files
###############
# Count table #
###############
# Read in the count table
count_table <- read.table(params$counts, sep = "\t", row.names = 1, header = T)

################
# Sample table #
################

# Read in the sample table
sample_table <- read.table(params$sample_info, sep = ",", header = T,
                           row.names = 1)

# Ensure the count table and sample table are in the same order
count_table <- count_table[ , rownames(sample_table)]

########
# Star #
########

# Read in star stats
star_stats <- read.table(params$star_stats, sep = "\t", row.names = 1,
                         header = T)

# Filter for mapping % and total read counts
star_stats <- star_stats[grepl(
  "%|Uniquely mapped reads|mapped to multiple loci",
  rownames(star_stats)), ]

star_stats <- star_stats[!grepl(
  "^Mismatch|chimeric", rownames(star_stats)), ]

# Calculate median size
star_stats_median <- star_stats[grepl(
  "Uniquely mapped reads", rownames(star_stats)), ]

median_size <- median(as.numeric(
  star_stats_median["Uniquely mapped reads number", ])) / 1000000

median_rate <- median(as.numeric(str_remove(
  star_stats_median["Uniquely mapped reads %", ], "%")))

alignment_qual <- get_alignment_qual(median_rate)

# Pull out all sample names
samples <- colnames(star_stats)

# Make a column of metrics
star_stats$metric <- rownames(star_stats)

# Make into long form for ggplot
star_stats_long <- gather(star_stats, key = "Sample", value = "value",
                          all_of(samples))


# Add column for value type
star_stats_long <- star_stats_long %>%
  mutate(
    value = as.numeric(str_remove(value, "%")),
    val_type = ifelse(grepl("%", metric),
                      "pct", "int")
  )

# Pull out read counts
# Calculate median library size
read_counts <- star_stats_long %>%
  filter(grepl("mapped reads number", metric)) %>%
  mutate(value = round(value / 1000000)) %>%
  mutate(value = str_c(value, " million"))

##########
# FastQC #
##########

# Load in fastqc results
fastqc_summary <- read.table(params$fastqc, sep = "\t")

# Change the colnames
colnames(fastqc_summary) <- c("Result", "Test", "Sample")

# Update sample names
fastqc_summary$Sample <- sub("_S[0-9]*_L[0-9]*_", "", fastqc_summary$Sample)
fastqc_summary$Sample <- sub("_001.fastq.gz", "", fastqc_summary$Sample)

##############
# Set colors #
##############
sample_table[[params$sample_column]] <- 
  factor(sample_table[[params$sample_column]])
sample_colors <- brewer.pal(length(
  levels(sample_table[[params$sample_column]])), "Set1")
names(sample_colors) <- levels(sample_table[[params$sample_column]])

Quality Control

FastQC Summary

FastQC was used to assess the quality of each fastq file. A summary of the results is shown below, the overall library quality was ****.

fastqc_plot <- ggplot(data = fastqc_summary,
                      mapping = aes(x = Sample,
                                    y = Test,
                                    fill = Result)) +
  geom_tile(color = "white", size = 0.5) +
  scale_fill_manual(values = fastqc_colors) +
  theme_classic() +
  theme(
    legend.title = element_blank(),
    legend.text  = element_text(size = 8, color = "black"),
    axis.title   = element_blank(),
    axis.text    = element_text(size = 8, color = "black"),
    axis.text.x  = element_text(angle = 90, hjust = 1, vjust = 0.5)
  )

fastqc_plot

Alignment Summary

Reads were aligned to the GRCm38 genome using the STAR RNA-seq aligner. A summary of the results is shown below, the library size is displayed on each bar. The median library size was 64.9422335 million reads and the median alignment rate was 92.36. Overall, the alignment rate was high.

# Plot alignment stats
star_stats_long_pct <- star_stats_long %>%
  filter(val_type == "pct") %>%
  
  # Add median rates to metric label
  mutate(
    metric = str_remove(metric, "% of reads | reads %"),
    metric = str_to_sentence(metric)
  ) %>%
  arrange(value) %>%
  mutate(metric = fct_inorder(metric))
  
# Plot STAR alignment stats
star_plot <- ggplot(data = star_stats_long_pct,
                    mapping = aes(x = Sample,
                                  y = value,
                                  fill = metric)) +
  geom_bar(stat = "identity", color = "white", size = 0.5) +
  
  # Label bars with library size
  geom_text(
    data = read_counts, 
    aes(Sample, 50, label = value),
    show.legend = F,
    inherit.aes = F,
    color = "white", 
    angle = 90
  ) +
  
  scale_fill_manual(
    values = star_colors,
    guide  = guide_legend(reverse = T)
  ) +
  scale_y_continuous(labels = function(x) {str_c(x, "%")}) +
  theme_classic() +
  theme(
    legend.title = element_blank(),
    legend.text  = element_text(size = 8, color = "black"),
    axis.title   = element_blank(),
    axis.text    = element_text(size = 8, color = "black"),
    axis.text.x  = element_text(angle = 90, hjust = 1, vjust = 0.5)
  )

star_plot

PCA

Principal component analysis was performed to assess similarities between samples. Biological replicates were plotted for each experimental condition. When replicates cluster close together, this suggests that overall gene expression patterns are fairly reproducible.

# This makes a DESeq2 object where the count data is our count matrix and the colData is our sample data. I am currently making the design based on "group" but we can change this if necessary
sample_table$genotype <- factor(sample_table$genotype,
                                levels = c("PTPN2_KO", "WT"))
sample_table$treatment <- factor(sample_table$treatment,
                                 levels = c("Cytokine_Treatment", "No_Treatment"))
sample_table$group <- factor(sample_table$group,
                                levels = c("PTPN2_KO_Cytokine_Treatment",
                                           "PTPN2_KO_NoTreatment",
                                           "WT_Cytokine_Treatment",
                                           "WT_NoTreatment"))
dds <- DESeqDataSetFromMatrix(countData = count_table,
                              colData = sample_table,
                              #design = formula(paste("~",
                              #                       params$sample_column))
                              design = ~genotype + treatment + 
                                genotype:treatment)

# Here we get rid of all the genes that are not expressed
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
dds <- DESeq(dds)

# Create a model
mod_mat <- model.matrix(design(dds), colData(dds))

PTPN2_non <- colMeans(mod_mat[dds$genotype == "PTPN2_KO" &
                             dds$treatment == "No_Treatment", ])
PTPN2_Cyto <- colMeans(mod_mat[dds$genotype == "PTPN2_KO" &
                              dds$treatment == "Cytokine_Treatment", ])
WT_Non <- colMeans(mod_mat[dds$genotype == "WT" &
                             dds$treatment == "No_Treatment", ])
WT_Cyto <- colMeans(mod_mat[dds$genotype == "WT" &
                              dds$treatment == "Cytokine_Treatment", ])

# This transforms the counts to have constant variance. This helps ensue that highly or lowly expressed genes will not contribute too much for the PCA or clustering analysis
vsd <- vst(dds, blind = F)

plot_pca(vsd, group_by = params$sample_column, color_palette = sample_colors)

Sample Distances

Correlations between samples was determined to further assess similarities between samples. Biological replicates should cluster together. Samples that are more similar to each other have values closer to 0.

# This determines distance between samples.
get_distances(vsd)

Differential Expression Analysis

PTPN2_WT_noTrt

# Extract DE genes
if(interaction){
  sig_genes <- get_de(dds, de_test = de_test, interaction = interaction,
                      var1 = treatment, var2 = control,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, de_test = de_test,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  
  sig_genes_all <- get_de(dds, de_test = de_test, interaction = interaction,
                          var1 = treatment, var2 = control,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
} else {
  sig_genes <- get_de(dds, column = params$sample_column, var1 = treatment,
                      var2 = control, interaction = interaction,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, column = params$sample_column,
                               var1 = control, var2 = treatment,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  all_genes_all <- get_de(dds, column = params$sample_column, var1 = treatment,
                          var2 = control, interaction = interaction,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
}
DE_genes_n <- nrow(sig_genes$DE_genes)

DEseq2 analysis

126 differentially expressed genes were identified using the DESeq2 package.

Differentially expressed genes are labeled on the MA plot in blue.

plotMA(sig_genes$DE_df, main = sect_title)

Heatmaps

Heatmap of DE genes across all samples

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = TRUE,
                        output_dir = heatmap_dir,
                        color_test = sample_colors,
                        save_name = sect_title)
print(heatmap)

Heatmap of DE genes across only the samples used in the comparision

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = FALSE,
                        plot_groups = c(treatment, control),
                        color_test = sample_colors)
print(heatmap)

Gene Set Enrichment

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

evcode <- TRUE

result <- run_gprofiler_all(gene_table = sig_genes$DE_genes,
                            pos_name = treatment,
                            neg_name = control,
                            custom_bg = all_gene_ens,
                            save_dir = file.path(params$output_dir, "GSE_files"),
                            plot_dir = file.path(params$output_dir, "images",
                                                 "GSE"),
                            save_name = sect_title,
                            evcodes = evcode)

plots <- result$plots
gost_table <- result[[paste0(treatment, "_vs_", control)]]$result

save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG")

gost_file <- openxlsx::createWorkbook()

invisible(lapply(save_gost, function(x){
  save_table <- gost_table %>%
    dplyr::filter(source == x)
    
  if(evcode){
    save_table <- save_table %>%
      dplyr::select(!c(intersection, evidence_codes))

  }
  sheetname <- sub(":", "_", x)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}))

openxlsx::saveWorkbook(wb = gost_file,
                       file = paste0(params$output_dir, "/GSE_files/", 
                                     sect_title, 
                                     "_GSE.xlsx"),
                       overwrite = TRUE)

plot_grid(plots$GOBP, plots$GOMF, plots$GOCC, plots$KEGG,
          nrow = 4,
          ncol = 1,
          align = "v",
          axis = "tb")

Gene Set Enrichment Separating Up and Down Regulation

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

ifelse(!dir.exists(file.path(params$output_dir, "GSE_files_separated")),
       dir.create(file.path(params$output_dir, "GSE_files_separated")),
       FALSE)
[1] FALSE
result <- run_gprofiler(gene_table = sig_genes$DE_genes,
                        pos_name = treatment,
                        neg_name = control,
                        custom_bg = all_gene_ens,
                        save_dir = file.path(params$output_dir,
                                             "GSE_files_separated"),
                        plot_dir = file.path(params$output_dir, "images",
                                             "GSE"),
                        evcodes = FALSE)

plots <- result$plots
gost_table_trt <- result[[treatment]]$result
gost_table_ctl <- result[[control]]$result


save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG", "WP")

gost_file_trt <- openxlsx::createWorkbook()

gost_file_ctl <- openxlsx::createWorkbook()

add_gost_data <- function(gost_table, gost_file, source_test,
                          evcode = FALSE){
  save_table <- gost_table %>%
    dplyr::filter(source == source_test)
  
  if(evcode){
    save_table <- save_table %>%
          dplyr::select(!c(intersection, evidence_codes))
  }

  sheetname <- sub(":", "_", source_test)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}

invisible(lapply(save_gost, function(x){
  add_gost_data(gost_table_trt, gost_file_trt, x, evcode = FALSE)
  add_gost_data(gost_table_ctl, gost_file_ctl, x, evcode = FALSE)
}))

openxlsx::saveWorkbook(wb = gost_file_trt,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", treatment, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

openxlsx::saveWorkbook(wb = gost_file_ctl,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", control, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

plot_grid(plots$C_GOBP, plots$T_GOBP,
          plots$C_GOMF, plots$T_GOMF,
          plots$C_GOCC, plots$T_GOCC,
          plots$C_KEGG, plots$T_KEGG,
          nrow = 8,
          ncol = 1,
          align = "v",
          axis = "tb")

WT_PTPN2_noTrt

# Extract DE genes
if(interaction){
  sig_genes <- get_de(dds, de_test = de_test, interaction = interaction,
                      var1 = treatment, var2 = control,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, de_test = de_test,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  
  sig_genes_all <- get_de(dds, de_test = de_test, interaction = interaction,
                          var1 = treatment, var2 = control,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
} else {
  sig_genes <- get_de(dds, column = params$sample_column, var1 = treatment,
                      var2 = control, interaction = interaction,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, column = params$sample_column,
                               var1 = control, var2 = treatment,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  all_genes_all <- get_de(dds, column = params$sample_column, var1 = treatment,
                          var2 = control, interaction = interaction,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
}
DE_genes_n <- nrow(sig_genes$DE_genes)

DEseq2 analysis

126 differentially expressed genes were identified using the DESeq2 package.

Differentially expressed genes are labeled on the MA plot in blue.

plotMA(sig_genes$DE_df, main = sect_title)

Heatmaps

Heatmap of DE genes across all samples

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = TRUE,
                        output_dir = heatmap_dir,
                        color_test = sample_colors,
                        save_name = sect_title)
print(heatmap)

Heatmap of DE genes across only the samples used in the comparision

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = FALSE,
                        plot_groups = c(treatment, control),
                        color_test = sample_colors)
print(heatmap)

Gene Set Enrichment

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

evcode <- TRUE

result <- run_gprofiler_all(gene_table = sig_genes$DE_genes,
                            pos_name = treatment,
                            neg_name = control,
                            custom_bg = all_gene_ens,
                            save_dir = file.path(params$output_dir, "GSE_files"),
                            plot_dir = file.path(params$output_dir, "images",
                                                 "GSE"),
                            save_name = sect_title,
                            evcodes = evcode)

plots <- result$plots
gost_table <- result[[paste0(treatment, "_vs_", control)]]$result

save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG")

gost_file <- openxlsx::createWorkbook()

invisible(lapply(save_gost, function(x){
  save_table <- gost_table %>%
    dplyr::filter(source == x)
    
  if(evcode){
    save_table <- save_table %>%
      dplyr::select(!c(intersection, evidence_codes))

  }
  sheetname <- sub(":", "_", x)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}))

openxlsx::saveWorkbook(wb = gost_file,
                       file = paste0(params$output_dir, "/GSE_files/", 
                                     sect_title, 
                                     "_GSE.xlsx"),
                       overwrite = TRUE)

plot_grid(plots$GOBP, plots$GOMF, plots$GOCC, plots$KEGG,
          nrow = 4,
          ncol = 1,
          align = "v",
          axis = "tb")

Gene Set Enrichment Separating Up and Down Regulation

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

ifelse(!dir.exists(file.path(params$output_dir, "GSE_files_separated")),
       dir.create(file.path(params$output_dir, "GSE_files_separated")),
       FALSE)
[1] FALSE
result <- run_gprofiler(gene_table = sig_genes$DE_genes,
                        pos_name = treatment,
                        neg_name = control,
                        custom_bg = all_gene_ens,
                        save_dir = file.path(params$output_dir,
                                             "GSE_files_separated"),
                        plot_dir = file.path(params$output_dir, "images",
                                             "GSE"),
                        evcodes = FALSE)

plots <- result$plots
gost_table_trt <- result[[treatment]]$result
gost_table_ctl <- result[[control]]$result


save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG", "WP")

gost_file_trt <- openxlsx::createWorkbook()

gost_file_ctl <- openxlsx::createWorkbook()

add_gost_data <- function(gost_table, gost_file, source_test,
                          evcode = FALSE){
  save_table <- gost_table %>%
    dplyr::filter(source == source_test)
  
  if(evcode){
    save_table <- save_table %>%
          dplyr::select(!c(intersection, evidence_codes))
  }

  sheetname <- sub(":", "_", source_test)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}

invisible(lapply(save_gost, function(x){
  add_gost_data(gost_table_trt, gost_file_trt, x, evcode = FALSE)
  add_gost_data(gost_table_ctl, gost_file_ctl, x, evcode = FALSE)
}))

openxlsx::saveWorkbook(wb = gost_file_trt,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", treatment, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

openxlsx::saveWorkbook(wb = gost_file_ctl,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", control, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

plot_grid(plots$C_GOBP, plots$T_GOBP,
          plots$C_GOMF, plots$T_GOMF,
          plots$C_GOCC, plots$T_GOCC,
          plots$C_KEGG, plots$T_KEGG,
          nrow = 8,
          ncol = 1,
          align = "v",
          axis = "tb")

WT_trt

# Extract DE genes
if(interaction){
  sig_genes <- get_de(dds, de_test = de_test, interaction = interaction,
                      var1 = treatment, var2 = control,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, de_test = de_test,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  
  sig_genes_all <- get_de(dds, de_test = de_test, interaction = interaction,
                          var1 = treatment, var2 = control,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
} else {
  sig_genes <- get_de(dds, column = params$sample_column, var1 = treatment,
                      var2 = control, interaction = interaction,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, column = params$sample_column,
                               var1 = control, var2 = treatment,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  all_genes_all <- get_de(dds, column = params$sample_column, var1 = treatment,
                          var2 = control, interaction = interaction,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
}
DE_genes_n <- nrow(sig_genes$DE_genes)

DEseq2 analysis

12,347 differentially expressed genes were identified using the DESeq2 package.

Differentially expressed genes are labeled on the MA plot in blue.

plotMA(sig_genes$DE_df, main = sect_title)

Heatmaps

Heatmap of DE genes across all samples

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = TRUE,
                        output_dir = heatmap_dir,
                        color_test = sample_colors,
                        save_name = sect_title)
print(heatmap)

Heatmap of DE genes across only the samples used in the comparision

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = FALSE,
                        plot_groups = c(treatment, control),
                        color_test = sample_colors)
print(heatmap)

Gene Set Enrichment

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

evcode <- TRUE

result <- run_gprofiler_all(gene_table = sig_genes$DE_genes,
                            pos_name = treatment,
                            neg_name = control,
                            custom_bg = all_gene_ens,
                            save_dir = file.path(params$output_dir, "GSE_files"),
                            plot_dir = file.path(params$output_dir, "images",
                                                 "GSE"),
                            save_name = sect_title,
                            evcodes = evcode)

plots <- result$plots
gost_table <- result[[paste0(treatment, "_vs_", control)]]$result

save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG")

gost_file <- openxlsx::createWorkbook()

invisible(lapply(save_gost, function(x){
  save_table <- gost_table %>%
    dplyr::filter(source == x)
    
  if(evcode){
    save_table <- save_table %>%
      dplyr::select(!c(intersection, evidence_codes))

  }
  sheetname <- sub(":", "_", x)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}))

openxlsx::saveWorkbook(wb = gost_file,
                       file = paste0(params$output_dir, "/GSE_files/", 
                                     sect_title, 
                                     "_GSE.xlsx"),
                       overwrite = TRUE)

plot_grid(plots$GOBP, plots$GOMF, plots$GOCC, plots$KEGG,
          nrow = 4,
          ncol = 1,
          align = "v",
          axis = "tb")

Gene Set Enrichment Separating Up and Down Regulation

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

ifelse(!dir.exists(file.path(params$output_dir, "GSE_files_separated")),
       dir.create(file.path(params$output_dir, "GSE_files_separated")),
       FALSE)
[1] FALSE
result <- run_gprofiler(gene_table = sig_genes$DE_genes,
                        pos_name = treatment,
                        neg_name = control,
                        custom_bg = all_gene_ens,
                        save_dir = file.path(params$output_dir,
                                             "GSE_files_separated"),
                        plot_dir = file.path(params$output_dir, "images",
                                             "GSE"),
                        evcodes = FALSE)

plots <- result$plots
gost_table_trt <- result[[treatment]]$result
gost_table_ctl <- result[[control]]$result


save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG", "WP")

gost_file_trt <- openxlsx::createWorkbook()

gost_file_ctl <- openxlsx::createWorkbook()

add_gost_data <- function(gost_table, gost_file, source_test,
                          evcode = FALSE){
  save_table <- gost_table %>%
    dplyr::filter(source == source_test)
  
  if(evcode){
    save_table <- save_table %>%
          dplyr::select(!c(intersection, evidence_codes))
  }

  sheetname <- sub(":", "_", source_test)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}

invisible(lapply(save_gost, function(x){
  add_gost_data(gost_table_trt, gost_file_trt, x, evcode = FALSE)
  add_gost_data(gost_table_ctl, gost_file_ctl, x, evcode = FALSE)
}))

openxlsx::saveWorkbook(wb = gost_file_trt,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", treatment, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

openxlsx::saveWorkbook(wb = gost_file_ctl,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", control, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

plot_grid(plots$C_GOBP, plots$T_GOBP,
          plots$C_GOMF, plots$T_GOMF,
          plots$C_GOCC, plots$T_GOCC,
          plots$C_KEGG, plots$T_KEGG,
          nrow = 8,
          ncol = 1,
          align = "v",
          axis = "tb")

PTPN2_trt

# Extract DE genes
if(interaction){
  sig_genes <- get_de(dds, de_test = de_test, interaction = interaction,
                      var1 = treatment, var2 = control,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, de_test = de_test,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  
  sig_genes_all <- get_de(dds, de_test = de_test, interaction = interaction,
                          var1 = treatment, var2 = control,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
} else {
  sig_genes <- get_de(dds, column = params$sample_column, var1 = treatment,
                      var2 = control, interaction = interaction,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, column = params$sample_column,
                               var1 = control, var2 = treatment,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  all_genes_all <- get_de(dds, column = params$sample_column, var1 = treatment,
                          var2 = control, interaction = interaction,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
}
DE_genes_n <- nrow(sig_genes$DE_genes)

DEseq2 analysis

12,762 differentially expressed genes were identified using the DESeq2 package.

Differentially expressed genes are labeled on the MA plot in blue.

plotMA(sig_genes$DE_df, main = sect_title)

Heatmaps

Heatmap of DE genes across all samples

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = TRUE,
                        output_dir = heatmap_dir,
                        color_test = sample_colors,
                        save_name = sect_title)
print(heatmap)

Heatmap of DE genes across only the samples used in the comparision

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = FALSE,
                        plot_groups = c(treatment, control),
                        color_test = sample_colors)
print(heatmap)

Gene Set Enrichment

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

evcode <- TRUE

result <- run_gprofiler_all(gene_table = sig_genes$DE_genes,
                            pos_name = treatment,
                            neg_name = control,
                            custom_bg = all_gene_ens,
                            save_dir = file.path(params$output_dir, "GSE_files"),
                            plot_dir = file.path(params$output_dir, "images",
                                                 "GSE"),
                            save_name = sect_title,
                            evcodes = evcode)

plots <- result$plots
gost_table <- result[[paste0(treatment, "_vs_", control)]]$result

save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG")

gost_file <- openxlsx::createWorkbook()

invisible(lapply(save_gost, function(x){
  save_table <- gost_table %>%
    dplyr::filter(source == x)
    
  if(evcode){
    save_table <- save_table %>%
      dplyr::select(!c(intersection, evidence_codes))

  }
  sheetname <- sub(":", "_", x)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}))

openxlsx::saveWorkbook(wb = gost_file,
                       file = paste0(params$output_dir, "/GSE_files/", 
                                     sect_title, 
                                     "_GSE.xlsx"),
                       overwrite = TRUE)

plot_grid(plots$GOBP, plots$GOMF, plots$GOCC, plots$KEGG,
          nrow = 4,
          ncol = 1,
          align = "v",
          axis = "tb")

Gene Set Enrichment Separating Up and Down Regulation

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

ifelse(!dir.exists(file.path(params$output_dir, "GSE_files_separated")),
       dir.create(file.path(params$output_dir, "GSE_files_separated")),
       FALSE)
[1] FALSE
result <- run_gprofiler(gene_table = sig_genes$DE_genes,
                        pos_name = treatment,
                        neg_name = control,
                        custom_bg = all_gene_ens,
                        save_dir = file.path(params$output_dir,
                                             "GSE_files_separated"),
                        plot_dir = file.path(params$output_dir, "images",
                                             "GSE"),
                        evcodes = FALSE)

plots <- result$plots
gost_table_trt <- result[[treatment]]$result
gost_table_ctl <- result[[control]]$result


save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG", "WP")

gost_file_trt <- openxlsx::createWorkbook()

gost_file_ctl <- openxlsx::createWorkbook()

add_gost_data <- function(gost_table, gost_file, source_test,
                          evcode = FALSE){
  save_table <- gost_table %>%
    dplyr::filter(source == source_test)
  
  if(evcode){
    save_table <- save_table %>%
          dplyr::select(!c(intersection, evidence_codes))
  }

  sheetname <- sub(":", "_", source_test)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}

invisible(lapply(save_gost, function(x){
  add_gost_data(gost_table_trt, gost_file_trt, x, evcode = FALSE)
  add_gost_data(gost_table_ctl, gost_file_ctl, x, evcode = FALSE)
}))

openxlsx::saveWorkbook(wb = gost_file_trt,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", treatment, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

openxlsx::saveWorkbook(wb = gost_file_ctl,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", control, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

plot_grid(plots$C_GOBP, plots$T_GOBP,
          plots$C_GOMF, plots$T_GOMF,
          plots$C_GOCC, plots$T_GOCC,
          plots$C_KEGG, plots$T_KEGG,
          nrow = 8,
          ncol = 1,
          align = "v",
          axis = "tb")

PTPN2_WT_Trt

# Extract DE genes
if(interaction){
  sig_genes <- get_de(dds, de_test = de_test, interaction = interaction,
                      var1 = treatment, var2 = control,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, de_test = de_test,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  
  sig_genes_all <- get_de(dds, de_test = de_test, interaction = interaction,
                          var1 = treatment, var2 = control,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
} else {
  sig_genes <- get_de(dds, column = params$sample_column, var1 = treatment,
                      var2 = control, interaction = interaction,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, column = params$sample_column,
                               var1 = control, var2 = treatment,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  all_genes_all <- get_de(dds, column = params$sample_column, var1 = treatment,
                          var2 = control, interaction = interaction,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
}
DE_genes_n <- nrow(sig_genes$DE_genes)

DEseq2 analysis

659 differentially expressed genes were identified using the DESeq2 package.

Differentially expressed genes are labeled on the MA plot in blue.

plotMA(sig_genes$DE_df, main = sect_title)

Heatmaps

Heatmap of DE genes across all samples

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = TRUE,
                        output_dir = heatmap_dir,
                        color_test = sample_colors,
                        save_name = sect_title)
print(heatmap)

Heatmap of DE genes across only the samples used in the comparision

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = FALSE,
                        plot_groups = c(treatment, control),
                        color_test = sample_colors)
print(heatmap)

Gene Set Enrichment

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

evcode <- TRUE

result <- run_gprofiler_all(gene_table = sig_genes$DE_genes,
                            pos_name = treatment,
                            neg_name = control,
                            custom_bg = all_gene_ens,
                            save_dir = file.path(params$output_dir, "GSE_files"),
                            plot_dir = file.path(params$output_dir, "images",
                                                 "GSE"),
                            save_name = sect_title,
                            evcodes = evcode)

plots <- result$plots
gost_table <- result[[paste0(treatment, "_vs_", control)]]$result

save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG")

gost_file <- openxlsx::createWorkbook()

invisible(lapply(save_gost, function(x){
  save_table <- gost_table %>%
    dplyr::filter(source == x)
    
  if(evcode){
    save_table <- save_table %>%
      dplyr::select(!c(intersection, evidence_codes))

  }
  sheetname <- sub(":", "_", x)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}))

openxlsx::saveWorkbook(wb = gost_file,
                       file = paste0(params$output_dir, "/GSE_files/", 
                                     sect_title, 
                                     "_GSE.xlsx"),
                       overwrite = TRUE)

plot_grid(plots$GOBP, plots$GOMF, plots$GOCC, plots$KEGG,
          nrow = 4,
          ncol = 1,
          align = "v",
          axis = "tb")

Gene Set Enrichment Separating Up and Down Regulation

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

ifelse(!dir.exists(file.path(params$output_dir, "GSE_files_separated")),
       dir.create(file.path(params$output_dir, "GSE_files_separated")),
       FALSE)
[1] FALSE
result <- run_gprofiler(gene_table = sig_genes$DE_genes,
                        pos_name = treatment,
                        neg_name = control,
                        custom_bg = all_gene_ens,
                        save_dir = file.path(params$output_dir,
                                             "GSE_files_separated"),
                        plot_dir = file.path(params$output_dir, "images",
                                             "GSE"),
                        evcodes = FALSE)

plots <- result$plots
gost_table_trt <- result[[treatment]]$result
gost_table_ctl <- result[[control]]$result


save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG", "WP")

gost_file_trt <- openxlsx::createWorkbook()

gost_file_ctl <- openxlsx::createWorkbook()

add_gost_data <- function(gost_table, gost_file, source_test,
                          evcode = FALSE){
  save_table <- gost_table %>%
    dplyr::filter(source == source_test)
  
  if(evcode){
    save_table <- save_table %>%
          dplyr::select(!c(intersection, evidence_codes))
  }

  sheetname <- sub(":", "_", source_test)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}

invisible(lapply(save_gost, function(x){
  add_gost_data(gost_table_trt, gost_file_trt, x, evcode = FALSE)
  add_gost_data(gost_table_ctl, gost_file_ctl, x, evcode = FALSE)
}))

openxlsx::saveWorkbook(wb = gost_file_trt,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", treatment, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

openxlsx::saveWorkbook(wb = gost_file_ctl,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", control, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

plot_grid(plots$C_GOBP, plots$T_GOBP,
          plots$C_GOMF, plots$T_GOMF,
          plots$C_GOCC, plots$T_GOCC,
          plots$C_KEGG, plots$T_KEGG,
          nrow = 8,
          ncol = 1,
          align = "v",
          axis = "tb")

PTPN2_specific_trt

# Extract DE genes
if(interaction){
  sig_genes <- get_de(dds, de_test = de_test, interaction = interaction,
                      var1 = treatment, var2 = control,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, de_test = de_test,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  
  sig_genes_all <- get_de(dds, de_test = de_test, interaction = interaction,
                          var1 = treatment, var2 = control,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
} else {
  sig_genes <- get_de(dds, column = params$sample_column, var1 = treatment,
                      var2 = control, interaction = interaction,
                      write_csv = T, p_value = params$DE_alpha,
                      lfc = 0, output_dir = params$output_dir,
                      lfc_shrink = T, save_name = sect_title)

  sig_genes_high_lfc <- get_de(dds, column = params$sample_column,
                               var1 = control, var2 = treatment,
                               interaction = interaction, write_csv = F,
                               p_value = params$DE_alpha,
                               lfc = params$DE_lfc,
                               lfc_shrink = T)
  all_genes_all <- get_de(dds, column = params$sample_column, var1 = treatment,
                          var2 = control, interaction = interaction,
                          write_csv = T, p_value = 1,
                          lfc = 0, output_dir = params$output_dir,
                          lfc_shrink = T, save_name = paste0(sect_title, "_all"))
}
DE_genes_n <- nrow(sig_genes$DE_genes)

DEseq2 analysis

181 differentially expressed genes were identified using the DESeq2 package.

Differentially expressed genes are labeled on the MA plot in blue.

plotMA(sig_genes$DE_df, main = sect_title)

Heatmaps

Heatmap of DE genes across all samples

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = TRUE,
                        output_dir = heatmap_dir,
                        color_test = sample_colors,
                        save_name = sect_title)
print(heatmap)

Heatmap of DE genes across only the samples used in the comparision

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
heatmap <- make_heatmap(dds = dds,
                        vsd = vsd,
                        de_genes = sig_genes$DE_genes,
                        treatment = treatment,
                        control = control,
                        group = params$sample_column,
                        print_genenames = FALSE,
                        cluster_cols = FALSE,
                        save_heatmap = FALSE,
                        plot_groups = c(treatment, control),
                        color_test = sample_colors)
print(heatmap)

Gene Set Enrichment

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

evcode <- TRUE

result <- run_gprofiler_all(gene_table = sig_genes$DE_genes,
                            pos_name = treatment,
                            neg_name = control,
                            custom_bg = all_gene_ens,
                            save_dir = file.path(params$output_dir, "GSE_files"),
                            plot_dir = file.path(params$output_dir, "images",
                                                 "GSE"),
                            save_name = sect_title,
                            evcodes = evcode)

plots <- result$plots
gost_table <- result[[paste0(treatment, "_vs_", control)]]$result

save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG")

gost_file <- openxlsx::createWorkbook()

invisible(lapply(save_gost, function(x){
  save_table <- gost_table %>%
    dplyr::filter(source == x)
    
  if(evcode){
    save_table <- save_table %>%
      dplyr::select(!c(intersection, evidence_codes))

  }
  sheetname <- sub(":", "_", x)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}))

openxlsx::saveWorkbook(wb = gost_file,
                       file = paste0(params$output_dir, "/GSE_files/", 
                                     sect_title, 
                                     "_GSE.xlsx"),
                       overwrite = TRUE)

plot_grid(plots$GOBP, plots$GOMF, plots$GOCC, plots$KEGG,
          nrow = 4,
          ncol = 1,
          align = "v",
          axis = "tb")

Gene Set Enrichment Separating Up and Down Regulation

For Gene Set Enrichment, I used gprofiler2 It uses a hypergeometric test to determine overrepresentation of genes from different categories.

all_gene_ens <- sub(".*_ENSMUSG", "ENSMUSG", rownames(dds))

ifelse(!dir.exists(file.path(params$output_dir, "GSE_files_separated")),
       dir.create(file.path(params$output_dir, "GSE_files_separated")),
       FALSE)
[1] FALSE
result <- run_gprofiler(gene_table = sig_genes$DE_genes,
                        pos_name = treatment,
                        neg_name = control,
                        custom_bg = all_gene_ens,
                        save_dir = file.path(params$output_dir,
                                             "GSE_files_separated"),
                        plot_dir = file.path(params$output_dir, "images",
                                             "GSE"),
                        evcodes = FALSE)

plots <- result$plots
gost_table_trt <- result[[treatment]]$result
gost_table_ctl <- result[[control]]$result


save_gost <- c("GO:BP", "GO:CC", "GO:MF", "KEGG", "WP")

gost_file_trt <- openxlsx::createWorkbook()

gost_file_ctl <- openxlsx::createWorkbook()

add_gost_data <- function(gost_table, gost_file, source_test,
                          evcode = FALSE){
  save_table <- gost_table %>%
    dplyr::filter(source == source_test)
  
  if(evcode){
    save_table <- save_table %>%
          dplyr::select(!c(intersection, evidence_codes))
  }

  sheetname <- sub(":", "_", source_test)
  openxlsx::addWorksheet(wb = gost_file, sheetName = sheetname)
  openxlsx::writeData(wb = gost_file, sheet = sheetname, x = save_table)
}

invisible(lapply(save_gost, function(x){
  add_gost_data(gost_table_trt, gost_file_trt, x, evcode = FALSE)
  add_gost_data(gost_table_ctl, gost_file_ctl, x, evcode = FALSE)
}))

openxlsx::saveWorkbook(wb = gost_file_trt,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", treatment, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

openxlsx::saveWorkbook(wb = gost_file_ctl,
                       file = file.path(params$output_dir, "GSE_files_separated", 
                                     paste0(sect_title, "_", control, 
                                     "_upregulated_genes_GSE.xlsx")),
                       overwrite = TRUE)

plot_grid(plots$C_GOBP, plots$T_GOBP,
          plots$C_GOMF, plots$T_GOMF,
          plots$C_GOCC, plots$T_GOCC,
          plots$C_KEGG, plots$T_KEGG,
          nrow = 8,
          ncol = 1,
          align = "v",
          axis = "tb")

Gene plots

Gene plots show the normalized expression of key genes across all samples. We can add in any genes that you would like to see in this format.

Ins1

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Ins2

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Gbp2

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Gbp5

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Cxcl1

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Pomc

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Chl1

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Npy

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Il33

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Robo1

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Rnasel

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Gbp8

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Serpina3i

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Raet1e

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Ggt1

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Apol7a

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Pcyt1b

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Rps3a1

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Bmp7

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Mapk13

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Sec22c

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Clu

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Pink1

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Lrrtm4

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Mecom

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Cnnm2

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Serpina3n

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Perp

all_gene_names <- rownames(dds)
all_gene_ids <- sub("_ENSMUSG.*", "", all_gene_names)
plot <- plot_genes(gene_id = gene,
                   gene_id_list = all_gene_ids,
                   deseq_obj = dds,
                   intgroup = params$sample_column,
                   plot_ggplot = TRUE,
                   color = sample_colors,
                   return_data = FALSE,
                   print = TRUE,
                   save_path = file.path(params$output_dir, "images", "gene_plots"))

Heatmaps

Cytokine treated samples

gene_lists <- openxlsx::getSheetNames(params$cytokine_treated_gene_list)
gene_file <- params$cytokine_treated_gene_list

control <- "WT_Cytokine_Treatment"
treatment <- "PTPN2_KO_Cytokine_Treatment"

heatmap_chunks <- gene_lists %>%
  map(~knit_expand("src/rules/scripts/heatmap_template.Rmd"))

Calcium and Potassium

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Enzymes

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Solute carrier proteins

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Lori green

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Lori yellow

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Lori Blue

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Non cytokine treated samples

gene_lists <- openxlsx::getSheetNames(params$untreated_gene_list)
gene_file <- params$untreated_gene_list

control <- "WT_NoTreatment"
treatment <- "PTPN2_KO_NoTreatment"

heatmap_chunks <- gene_lists %>%
  map(~knit_expand("src/rules/scripts/heatmap_template.Rmd"))

Protein metabolism genes

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Solute carrier protein genes

Heatmap of DE genes across all samples

heatmap_1 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          color_test = sample_colors)
print(heatmap_1)

Heatmap of DE genes across only the samples used in the comparision

heatmap_2 <- make_heatmap(dds = dds,
                          vsd = vsd,
                          de_genes = all_gene_ids,
                          treatment = treatment,
                          control = control,
                          group = params$sample_column,
                          print_genenames = TRUE,
                          cluster_cols = FALSE,
                          save_heatmap = FALSE,
                          plot_groups = c(treatment, control),
                          color_test = sample_colors)
print(heatmap_2)

heatmap_dir <- file.path(params$output_dir, "images", "heatmaps")
gene_list_print <- sub(" ", "_", gene_list)
save_name <- paste0(gene_list_print, "_", treatment, "_", control, ".pdf")
pdf(file.path(params$output_dir, "images", "heatmaps", save_name),
    height = figure_height, width = 10)
grid::grid.newpage()
grid::grid.draw(heatmap_1$gtable)
grid::grid.newpage()
grid::grid.draw(heatmap_2$gtable)
dev.off()

Session Info

sessionInfo()
R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] org.Hs.eg.db_3.12.0         AnnotationDbi_1.52.0       
 [3] KEGGREST_1.30.1             pathview_1.30.1            
 [5] DESeq2_1.30.0               SummarizedExperiment_1.20.0
 [7] Biobase_2.50.0              MatrixGenerics_1.2.1       
 [9] matrixStats_0.58.0          GenomicRanges_1.42.0       
[11] GenomeInfoDb_1.26.2         IRanges_2.24.1             
[13] S4Vectors_0.28.1            BiocGenerics_0.36.0        
[15] openxlsx_4.2.3              apeglm_1.12.0              
[17] ashr_2.2-47                 RColorBrewer_1.1-2         
[19] viridis_0.5.1               viridisLite_0.3.0          
[21] gprofiler2_0.2.0            devtools_2.3.2             
[23] usethis_2.0.0               ggrepel_0.9.1              
[25] pheatmap_1.0.12             Matrix_1.3-3               
[27] forcats_0.5.1               stringr_1.4.0              
[29] dplyr_1.0.3                 purrr_0.3.4                
[31] readr_1.4.0                 tidyr_1.1.2                
[33] tibble_3.0.6                ggplot2_3.3.3              
[35] tidyverse_1.3.0             cowplot_1.1.1              
[37] BiocManager_1.30.10         knitr_1.31                 

loaded via a namespace (and not attached):
  [1] readxl_1.3.1           backports_1.2.1        plyr_1.8.6            
  [4] lazyeval_0.2.2         splines_4.0.3          BiocParallel_1.24.1   
  [7] digest_0.6.27          invgamma_1.1           htmltools_0.5.1.1     
 [10] SQUAREM_2021.1         magrittr_2.0.1         memoise_2.0.0         
 [13] remotes_2.3.0          Biostrings_2.58.0      annotate_1.68.0       
 [16] modelr_0.1.8           bdsmatrix_1.3-4        prettyunits_1.1.1     
 [19] colorspace_2.0-0       blob_1.2.1             rvest_0.3.6           
 [22] haven_2.3.1            xfun_0.27              callr_3.5.1           
 [25] crayon_1.4.0           RCurl_1.98-1.2         jsonlite_1.7.2        
 [28] graph_1.68.0           genefilter_1.72.1      survival_3.2-11       
 [31] glue_1.4.2             gtable_0.3.0           zlibbioc_1.36.0       
 [34] XVector_0.30.0         DelayedArray_0.16.1    pkgbuild_1.2.0        
 [37] Rgraphviz_2.34.0       scales_1.1.1           mvtnorm_1.1-1         
 [40] DBI_1.1.1              Rcpp_1.0.7             xtable_1.8-4          
 [43] emdbook_1.3.12         bit_4.0.4              truncnorm_1.0-8       
 [46] htmlwidgets_1.5.3      httr_1.4.2             ellipsis_0.3.1        
 [49] farver_2.0.3           pkgconfig_2.0.3        XML_3.99-0.5          
 [52] sass_0.3.1             dbplyr_2.0.0           locfit_1.5-9.4        
 [55] labeling_0.4.2         tidyselect_1.1.0       rlang_0.4.10          
 [58] munsell_0.5.0          cellranger_1.1.0       tools_4.0.3           
 [61] cachem_1.0.1           cli_2.3.0              generics_0.1.0        
 [64] RSQLite_2.2.3          broom_0.7.4            evaluate_0.14         
 [67] fastmap_1.1.0          yaml_2.2.1             processx_3.4.5        
 [70] bit64_4.0.5            fs_1.5.0               zip_2.1.1             
 [73] KEGGgraph_1.50.0       xml2_1.3.2             compiler_4.0.3        
 [76] rstudioapi_0.13        png_0.1-7              plotly_4.9.3          
 [79] testthat_3.0.1         reprex_1.0.0           geneplotter_1.68.0    
 [82] bslib_0.2.4            stringi_1.5.3          highr_0.8             
 [85] ps_1.5.0               desc_1.2.0             lattice_0.20-44       
 [88] vctrs_0.3.6            pillar_1.4.7           lifecycle_0.2.0       
 [91] jquerylib_0.1.3        data.table_1.13.6      bitops_1.0-6          
 [94] irlba_2.3.3            R6_2.5.0               gridExtra_2.3         
 [97] sessioninfo_1.1.1      MASS_7.3-54            assertthat_0.2.1      
[100] pkgload_1.1.0          rprojroot_2.0.2        withr_2.4.1           
[103] GenomeInfoDbData_1.2.4 hms_1.0.0              grid_4.0.3            
[106] coda_0.19-4            rmarkdown_2.11         mixsqp_0.3-43         
[109] bbmle_1.0.23.1         numDeriv_2016.8-1.1    lubridate_1.8.0       
ifelse(!dir.exists(file.path(params$output_dir, "objs")),
       dir.create(file.path(params$output_dir, "objs")), FALSE)
[1] FALSE
saveRDS(dds, paste0(params$output_dir, "/objs/dds_obj.rda"))

saveRDS(vsd, paste0(params$output_dir, "/objs/vsd_obj.rda"))